###################################################################################################
# Primary Election Mechanism ######################################################################
###################################################################################################

# load multipanel table function

  source("./7.multi_panel_table_function.R")

  detach("package:dplyr", unload=TRUE)
  if(!is.element("dplyr",installed.packages())){install.packages("dplyr")}
  require("dplyr",quietly = T)

# legislator data ###################################

  prcleg10 <- read.csv("./better_prohibition_rollcall_agdat_legislator_1910.csv")
  prcleg10$era <- "early"; prcleg10$era_num <- 1
  prcleg10$South <- as.factor(prcleg10$south)
  prcleg30 <- read.csv("./better_prohibition_rollcall_agdat_legislator_1930.csv")
  prcleg30$era <- "late"; prcleg30$era_num <- 2
  prcleg30$South <- as.factor(prcleg30$south)
  
  prcleg   <- rbind(prcleg10,prcleg30)
  prcleg$distid <- paste(prcleg$distid,prcleg$era,sep="_")
    
  # pull out the first congress and the last congress for their district
    
    prcleg$cong1 <- str_sub(gsub("early","",fixed=T,gsub("late","",fixed=T,prcleg$distid)),-6,-5)
    prcleg$cong2 <- str_sub(gsub("early","",fixed=T,gsub("late","",fixed=T,prcleg$distid)),-3,-2)
    
  # subset
    
    prcleg <- prcleg[,c("state","dist","distid","icpsr","era","south","mean_pro_choice","district_preference","n_votes","at_large","cong1","cong2")]
  
# election data #########################################
  
  elec <- as.data.frame(read_dta("./cong_primary_1910_1936.dta"))
    
  # limit years and create era variables  
    
    elec <- elec[elec$year<=1934,]
    elec$era <- NA
    elec[elec$year>=1910 & elec$year<=1918,"era"] <- "early"   
    elec[elec$year>=1928 & elec$year<=1932,"era"] <- "late"
    elec <- elec[!is.na(elec$era),]
    
  # fix a few at-large district markers
  
    elec[elec$dist>70,"dist"] <- 0
    elec[(elec$state=="MT" | elec$state=="ID") & elec$year<=1915,"dist"] <- 0
    
  # find the total number of candidates in each primary race
  
    elec_n_cand <- elec %>% group_by(state,year,office,dist,party) %>% summarise(n_cand = dplyr::n())
      
    elec <- merge(elec,elec_n_cand,by=c("state","year","office","dist","party"),all.x=TRUE)
    
  # calculate each individual's vote share
  # to do so we first fix up uncontested races, then find the total number of votes in each race and divide by that
  
    elec[is.na(elec$vote_p) & elec$n_cand==1,"vote_p"] <- 1  # this is just a hack to get these people 100% of the vote
    
    elec_ag <- elec %>% group_by(state,year,office,dist,type,party) %>% summarise(tot_vote_p = sum(vote_p))
    
    elec <- merge(elec,elec_ag,by=c("state","year","office","dist","type","party"),all.x=TRUE)
    
    elec$p_pct <- elec$vote_p/elec$tot_vote_p
  
  # now we need to limit the data to incumbents
    
    elec <- elec[elec$inc==1,]
    
  # mark the congress year
  
    elec$cong <- (elec$year-1786)/2
    
  # mark democrats
    
    elec$dem_elec <- ifelse(elec$party=="D",1,0)
    
  # limit to relevant variables
  
    elec <- elec[,c("state","year","cong","dist","icpsr","era","party","dem_elec","p_pct","w_p")]
  
# merge ###########################################
  
  dat <- inner_join(elec,prcleg,by=c("state","icpsr","dist","era"))  # we use inner join because we only want observations that are in both
    
    # the election data is by year, while the other data is by legislator. We therefore might end up with cases where we match
    # but the legislator is serving in a different *version* of a district (for instance, still district 1, but a new iteration of it).
    # we therefore only keep cases where the legislator's district began before or ended after the election year
  
    dat <- dat[dat$cong>=dat$cong1 & dat$cong<=dat$cong2,]

    # calculate our main measure: the distance between a legislator's record and their constituents' preferences
  
    dat$distance <- abs(dat$district_preference-dat$mean_pro_choice)
    
    # make an indicator for whether an individual legislator ins in the top quarter of distance

    dat$big_distance <- ifelse(dat$distance>=median(dat$distance[dat$distance>median(dat$distance,na.rm=T)],na.rm=T),1,0)
    
    # make a legislator-district id variable
  
    dat$icpsr_dist <- paste(dat$icpsr,dat$distid,sep="_")
    
    # drop at-large districts
  
    dat <- dat[dat$at_large==0,]
  
# analysis ###############################################
    
# run regressions
    
  win <- felm(w_p ~ distance
              | state+year | 0 | icpsr_dist, data=dat)
  
  pct <- felm(p_pct ~ distance 
              | state+year | 0 | icpsr_dist, data=dat)

  win2 <- felm(w_p ~ big_distance 
              | state+year | 0 | icpsr_dist, data=dat)
  
  pct2 <- felm(p_pct ~ big_distance 
              | state+year | 0 | icpsr_dist, data=dat)
  
  win_s <- felm(w_p ~ distance 
                | state+year | 0 | icpsr_dist, data=dat[dat$south==1,])
  
  pct_s <- felm(p_pct ~ distance 
                |state+ year | 0 |icpsr_dist, data=dat[dat$south==1,])
  
  win2_s <- felm(w_p ~ big_distance 
                | state+year | 0 | icpsr_dist, data=dat[dat$south==1,])
  
  pct2_s <- felm(p_pct ~ big_distance 
                | state+year | 0 | icpsr_dist, data=dat[dat$south==1,])
  
# assemble table

  full_mods <- list(panel_title = "Pooled",
                  models = list(win,win2,
                                pct,pct2),
                  var_names = c("Distance on Prohibition",
                                "Distant on Prohibition (0-1)"))
  
  south_mods <- list(panel_title = "South Only",
                    models = list(win_s,win2_s,
                                  pct_s,pct2_s),
                    var_names = c("Distance on Prohibition",
                                  "Distant on Prohibition (0-1)"))
  
  # this uses the "multi_panel_table" function called above
  
  multi_panel_table(panels= list(full_mods,south_mods),
                                path="./results/full_mods2.tex",
                                dep_var_label = c("Won","Won",
                                                  "Vote Share","Vote Share"),
                                note_width = 0.575,
                                order=c(1,2),
                                item_list= NULL,
                                table_caption = "Prohibition Congruence and Primary Success for U.S. House Incumbents",
                                table_font_size = "\\footnotesize",
                                table_label = "full_mods",
                                p_level = 0.05,
                                note = "Entries are linear regression coefficients with 
                                        standard errors, clustered by legislator-district, in parentheses. All models include state and year fixed effects.
                                        \\textit{Distant on Prohibition (0-1)} is an indicator for whether a 
                                        legislator is in the highest quartile of \\textit{Distance on Prohibition}. $^*$p$<$0.05 (two-tailed test).")
    
    
    
    

    
    
    
   